Figure 1 is going to be two panes. The first pane (Fig. 1a) will have total publications published over time. The second pane (Fig. 1b) will have three lines (ecology, governance, and socioeconomics) and will show the frequency of these types of publicatons over time. All three categories in pane 2 (Fig. 1b) will not add up to the total publications in pane 1 (Fig. 1a).
Data contains ‘orphans’.
mydata <- read_csv(here("data", "Data_total_working_for_r.csv"))
Subset to only look at year and types of co-location.
data_sub <- mydata %>%
select(first_author, year, lme, ecology, governance, socioecon) %>%
group_by(year) %>%
summarize(count = n())
# Pane 1 (Fig. 1a)
# count of publications over time
g1 <- ggplot(data = data_sub, aes(x = year, y = count)) +
geom_line(color = "darkgray", size = .7) +
labs(x = "Year",
y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45))
ggplotly(g1)
This might be semi sloppy code and there might be a cleaner way to do this but I’m not sure what that would be and I feel confident in the messy coding I’ve done here.
# Need to rearrange the data for pane 2
# Plan to create a subset that filters for all the ecology counts and totals them over time
# then do the same thing for governance and socioecon
# then rbind them together
data_sub_b_ecol <- mydata %>%
select(first_author, year, lme, ecology) %>%
group_by(year) %>%
summarize(count = sum(ecology))
ecology <- rep(c("Ecology"), times = 12)
#colnames <- c("year", "discipline", "count")
ecol_sub <- cbind(data_sub_b_ecol$year,
ecology,
data_sub_b_ecol$count)
ecol_df <- data.frame(ecol_sub) %>%
rename(year = V1) %>%
rename(discipline = ecology) %>%
rename(count = V3)
# same thing for socioeconomics section
data_sub_b_socio <- mydata %>%
select(first_author, year, lme, socioecon) %>%
group_by(year) %>%
summarize(count = sum(socioecon))
socioecon <- rep(c("Socioeconomics"), times = 12)
#can use the colnames value from above
socio_sub <- cbind(data_sub_b_socio$year,
socioecon,
data_sub_b_socio$count)
socio_df <- data.frame(socio_sub) %>%
rename(year = V1) %>%
rename(discipline = socioecon) %>%
rename(count = V3)
# same thing for governance section
data_sub_b_gov <- mydata %>%
select(first_author, year, lme, governance) %>%
group_by(year) %>%
summarize(count = sum(governance))
governance <- rep(c("Governance"), times = 12)
#can use the colnames value from above
gov_sub <- cbind(data_sub_b_gov$year,
governance,
data_sub_b_gov$count)
gov_df <- data.frame(gov_sub) %>%
rename(year = V1) %>%
rename(discipline = governance) %>%
rename(count = V3)
# rbinding the 3 dfs into one
all_df <- rbind(ecol_df, gov_df, socio_df) %>%
mutate(year = as.numeric(year)) %>%
mutate(count = as.integer(count))
# class was checked and its a data frame
Using ggplotly() here to allow us to look at each value, when needed.
g2 <- ggplot(data = all_df, aes(x = year, y = count)) +
geom_line(aes(color = discipline), size = .7) +
labs(x = "Year",
y = "Count") +
theme_minimal() +
theme(legend.title = element_blank(), legend.position= 'bottom', axis.text.x = element_text(angle = 45)) +
scale_colour_manual(values = c("Ecology" = "#00BA38", "Governance" = "#619CFF", "Socioeconomics" = "#F8766D"))
ggplotly(g2)
Using cowplot to patch panes 1 and 2 together
plot_grid(g1, g2, labels = c("A", "B"))
Figure 1. Shows frequency of publications over time (years 2000-2020). Pane A reflects the total frequency of co-location publications published over time. Pane B reflects the frequency of co-location publications, separated by publication discipline (ie ecology, governance or socioeconomics), published over time.